This report covers the data analysis, interpretation of the findings and data visualisation of the data set available here
The link to this project can be found on my Github here
In the code below, Original dataset is referred to as ‘df’, Copy of the dataset used for cleaning is referred to as ‘datacopy’.
# loading the libraries
library(funModeling)
library(dplyr)
library(ggplot2)
library(plotrix)
library(chron)
library(corrplot)
library(sf)
library(mapview)
df <- read.csv("37-00049_UOF-P_2016_prepped.csv")
datacopy <- data.frame(df)
attach(datacopy)
str(datacopy)
## 'data.frame': 2384 obs. of 47 variables:
## $ INCIDENT_DATE : chr "OCCURRED_D" "9/3/16" "3/22/16" "5/22/16" ...
## $ INCIDENT_TIME : chr "OCCURRED_T" "4:14:00 AM" "11:00:00 PM" "1:29:00 PM" ...
## $ UOF_NUMBER : chr "UOFNum" "37702" "33413" "34567" ...
## $ OFFICER_ID : chr "CURRENT_BA" "10810" "7706" "11014" ...
## $ OFFICER_GENDER : chr "OffSex" "Male" "Male" "Male" ...
## $ OFFICER_RACE : chr "OffRace" "Black" "White" "Black" ...
## $ OFFICER_HIRE_DATE : chr "HIRE_DT" "5/7/14" "1/8/99" "5/20/15" ...
## $ OFFICER_YEARS_ON_FORCE : chr "INCIDENT_DATE_LESS_" "2" "17" "1" ...
## $ OFFICER_INJURY : chr "OFF_INJURE" "No" "Yes" "No" ...
## $ OFFICER_INJURY_TYPE : chr "OFF_INJURE_DESC" "No injuries noted or visible" "Sprain/Strain" "No injuries noted or visible" ...
## $ OFFICER_HOSPITALIZATION : chr "OFF_HOSPIT" "No" "Yes" "No" ...
## $ SUBJECT_ID : chr "CitNum" "46424" "44324" "45126" ...
## $ SUBJECT_RACE : chr "CitRace" "Black" "Hispanic" "Hispanic" ...
## $ SUBJECT_GENDER : chr "CitSex" "Female" "Male" "Male" ...
## $ SUBJECT_INJURY : chr "CIT_INJURE" "Yes" "No" "No" ...
## $ SUBJECT_INJURY_TYPE : chr "SUBJ_INJURE_DESC" "Non-Visible Injury/Pain" "No injuries noted or visible" "No injuries noted or visible" ...
## $ SUBJECT_WAS_ARRESTED : chr "CIT_ARREST" "Yes" "Yes" "Yes" ...
## $ SUBJECT_DESCRIPTION : chr "CIT_INFL_A" "Mentally unstable" "Mentally unstable" "Unknown" ...
## $ SUBJECT_OFFENSE : chr "CitChargeT" "APOWW" "APOWW" "APOWW" ...
## $ REPORTING_AREA : chr "RA" "2062" "1197" "4153" ...
## $ BEAT : chr "BEAT" "134" "237" "432" ...
## $ SECTOR : chr "SECTOR" "130" "230" "430" ...
## $ DIVISION : chr "DIVISION" "CENTRAL" "NORTHEAST" "SOUTHWEST" ...
## $ LOCATION_DISTRICT : chr "DIST_NAME" "D14" "D9" "D6" ...
## $ STREET_NUMBER : chr "STREET_N" "211" "7647" "716" ...
## $ STREET_NAME : chr "STREET" "Ervay" "Ferguson" "bimebella dr" ...
## $ STREET_DIRECTION : chr "street_g" "N" "NULL" "NULL" ...
## $ STREET_TYPE : chr "street_t" "St." "Rd." "Ln." ...
## $ LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: chr "Street Address" "211 N ERVAY ST" "7647 FERGUSON RD" "716 BIMEBELLA LN" ...
## $ LOCATION_CITY : chr "City" "Dallas" "Dallas" "Dallas" ...
## $ LOCATION_STATE : chr "State" "TX" "TX" "TX" ...
## $ LOCATION_LATITUDE : chr "Latitude" "32.782205" "32.798978" "32.73971" ...
## $ LOCATION_LONGITUDE : chr "Longitude" "-96.797461" "-96.717493" "-96.92519" ...
## $ INCIDENT_REASON : chr "SERVICE_TY" "Arrest" "Arrest" "Arrest" ...
## $ REASON_FOR_FORCE : chr "UOF_REASON" "Arrest" "Arrest" "Arrest" ...
## $ TYPE_OF_FORCE_USED1 : chr "ForceType1" "Hand/Arm/Elbow Strike" "Joint Locks" "Take Down - Group" ...
## $ TYPE_OF_FORCE_USED2 : chr "ForceType2" "" "" "" ...
## $ TYPE_OF_FORCE_USED3 : chr "ForceType3" "" "" "" ...
## $ TYPE_OF_FORCE_USED4 : chr "ForceType4" "" "" "" ...
## $ TYPE_OF_FORCE_USED5 : chr "ForceType5" "" "" "" ...
## $ TYPE_OF_FORCE_USED6 : chr "ForceType6" "" "" "" ...
## $ TYPE_OF_FORCE_USED7 : chr "ForceType7" "" "" "" ...
## $ TYPE_OF_FORCE_USED8 : chr "ForceType8" "" "" "" ...
## $ TYPE_OF_FORCE_USED9 : chr "ForceType9" "" "" "" ...
## $ TYPE_OF_FORCE_USED10 : chr "ForceType10" "" "" "" ...
## $ NUMBER_EC_CYCLES : chr "Cycles_Num" "NULL" "NULL" "NULL" ...
## $ FORCE_EFFECTIVE : chr "ForceEffec" " Yes" " Yes" " Yes" ...
Initially We remove the extra definition row (row1) at the first index to eliminate redundancy followerd by assigning all the “” and “NULL” to NA for easier manipulation of the data down the road. We then check for missing values by using the is.na() function.
datacopy <- datacopy[-c(1),]
datacopy[datacopy == "NULL"] <- NA
datacopy[datacopy == ""] <- NA
colSums(sapply(datacopy, is.na))
## INCIDENT_DATE
## 0
## INCIDENT_TIME
## 10
## UOF_NUMBER
## 0
## OFFICER_ID
## 0
## OFFICER_GENDER
## 0
## OFFICER_RACE
## 0
## OFFICER_HIRE_DATE
## 0
## OFFICER_YEARS_ON_FORCE
## 0
## OFFICER_INJURY
## 0
## OFFICER_INJURY_TYPE
## 0
## OFFICER_HOSPITALIZATION
## 0
## SUBJECT_ID
## 0
## SUBJECT_RACE
## 39
## SUBJECT_GENDER
## 10
## SUBJECT_INJURY
## 0
## SUBJECT_INJURY_TYPE
## 0
## SUBJECT_WAS_ARRESTED
## 0
## SUBJECT_DESCRIPTION
## 76
## SUBJECT_OFFENSE
## 0
## REPORTING_AREA
## 0
## BEAT
## 0
## SECTOR
## 0
## DIVISION
## 0
## LOCATION_DISTRICT
## 0
## STREET_NUMBER
## 0
## STREET_NAME
## 0
## STREET_DIRECTION
## 1728
## STREET_TYPE
## 143
## LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION
## 0
## LOCATION_CITY
## 0
## LOCATION_STATE
## 0
## LOCATION_LATITUDE
## 55
## LOCATION_LONGITUDE
## 55
## INCIDENT_REASON
## 11
## REASON_FOR_FORCE
## 11
## TYPE_OF_FORCE_USED1
## 0
## TYPE_OF_FORCE_USED2
## 747
## TYPE_OF_FORCE_USED3
## 1510
## TYPE_OF_FORCE_USED4
## 1996
## TYPE_OF_FORCE_USED5
## 2226
## TYPE_OF_FORCE_USED6
## 2322
## TYPE_OF_FORCE_USED7
## 2361
## TYPE_OF_FORCE_USED8
## 2378
## TYPE_OF_FORCE_USED9
## 2382
## TYPE_OF_FORCE_USED10
## 2382
## NUMBER_EC_CYCLES
## 2226
## FORCE_EFFECTIVE
## 0
Changing the dates to date format
x_dates <- as.Date(INCIDENT_DATE, "%m/%d/%Y")
x_dates <- x_dates[-1]
datacopy$INCIDENT_DATE <- x_dates
rm(x_dates)
x_dates <- as.Date(OFFICER_HIRE_DATE, "%m/%d/%Y")
x_dates <- x_dates[-1]
datacopy$OFFICER_HIRE_DATE <- x_dates
rm(x_dates)
Converting the format from character to POSIXlt
datacopy$INCIDENT_TIME <- strptime(datacopy$INCIDENT_TIME, format = "%H:%M:%S")
Converting desired variables into numeric
datacopy$OFFICER_ID <- as.numeric(datacopy$OFFICER_ID)
datacopy$SUBJECT_ID <- as.numeric(datacopy$SUBJECT_ID)
datacopy$OFFICER_YEARS_ON_FORCE <- as.numeric(datacopy$OFFICER_YEARS_ON_FORCE)
datacopy$BEAT <- as.numeric(datacopy$BEAT)
datacopy$SECTOR <- as.numeric(datacopy$SECTOR)
datacopy$REPORTING_AREA <- as.numeric(datacopy$REPORTING_AREA)
datacopy$STREET_NUMBER <- as.numeric(datacopy$STREET_NUMBER)
datacopy$LOCATION_LATITUDE <- as.numeric(datacopy$LOCATION_LATITUDE)
datacopy$LOCATION_LONGITUDE <- as.numeric(datacopy$LOCATION_LONGITUDE)
Converting desired variables into factors
datacopy$OFFICER_GENDER <- as.factor(datacopy$OFFICER_GENDER)
summary(datacopy$OFFICER_GENDER)
## Female Male
## 240 2143
datacopy$OFFICER_RACE <- as.factor(datacopy$OFFICER_RACE)
summary(datacopy$OFFICER_RACE)
## American Ind Asian Black Hispanic Other White
## 8 55 341 482 27 1470
datacopy$SUBJECT_RACE <- as.factor(datacopy$SUBJECT_RACE)
summary(datacopy$SUBJECT_RACE)
## American Ind Asian Black Hispanic Other White
## 1 5 1333 524 11 470
## NA's
## 39
datacopy$SUBJECT_GENDER <- as.factor(datacopy$SUBJECT_GENDER)
summary(datacopy$SUBJECT_GENDER)
## Female Male Unknown NA's
## 440 1932 1 10
datacopy$OFFICER_INJURY <- as.factor(datacopy$OFFICER_INJURY)
summary(datacopy$OFFICER_INJURY)
## No Yes
## 2149 234
datacopy$SUBJECT_INJURY <- as.factor(datacopy$SUBJECT_INJURY)
summary(datacopy$SUBJECT_INJURY)
## No Yes
## 1754 629
datacopy$OFFICER_HOSPITALIZATION <- as.factor(datacopy$OFFICER_HOSPITALIZATION)
summary(datacopy$OFFICER_HOSPITALIZATION)
## No Yes
## 2335 48
datacopy$SUBJECT_WAS_ARRESTED <- as.factor(datacopy$SUBJECT_WAS_ARRESTED)
summary(datacopy$SUBJECT_WAS_ARRESTED)
## No Yes
## 335 2048
datacopy$SUBJECT_DESCRIPTION <- as.factor(datacopy$SUBJECT_DESCRIPTION)
summary(datacopy$SUBJECT_DESCRIPTION)
## Alchohol Alchohol and unknown drugs
## 382 280
## Animal FD-Animal
## 1 1
## FD-Motor Vehicle FD-Suspect Unarmed
## 2 29
## FD-Suspect w/ Gun FD-Suspect w/ Other Weapon
## 36 25
## FD-Unknown if Armed Marijuana
## 110 50
## Mentally unstable None detected
## 412 297
## Unknown Unknown Drugs
## 364 318
## NA's
## 76
datacopy$DIVISION <- as.factor(datacopy$DIVISION)
summary(datacopy$DIVISION)
## CENTRAL NORTH CENTRAL NORTHEAST NORTHWEST SOUTH CENTRAL
## 563 319 341 191 310
## SOUTHEAST SOUTHWEST
## 362 297
datacopy$LOCATION_DISTRICT <- as.factor(datacopy$LOCATION_DISTRICT)
summary(datacopy$LOCATION_DISTRICT)
## D1 D10 D11 D12 D13 D14 D2 D3 D4 D5 D6 D7 D8 D9
## 84 140 160 135 93 313 310 96 222 114 213 231 174 98
datacopy$STREET_DIRECTION <- as.factor(datacopy$STREET_DIRECTION)
summary(datacopy$STREET_DIRECTION)
## E N S W NA's
## 120 225 187 123 1728
datacopy$STREET_TYPE <- as.factor(datacopy$STREET_TYPE)
summary(datacopy$STREET_TYPE)
## Ave Ave. Blvd. Cir. Ct. Dr Dr. Expwy.
## 3 353 160 8 5 1 321 36
## Frwy Frwy. Hwy. Intersec Ln. Pkwy. Pl. Rd.
## 1 52 36 1 219 45 9 400
## Row St St. Trl. Way NA's
## 1 2 557 10 20 143
datacopy$INCIDENT_REASON <- as.factor(datacopy$INCIDENT_REASON)
summary(datacopy$INCIDENT_REASON)
## Accidental Discharge Arrest Call for Cover
## 1 1157 131
## Crime in Progress Crowd Control Off-Duty Employment
## 82 5 51
## Off-Duty Incident Other ( In Narrative) Pedestrian Stop
## 12 70 36
## Service Call Suspicious Activity Traffic Stop
## 673 47 93
## Warrant Execution NA's
## 14 11
datacopy$REASON_FOR_FORCE <- as.factor(datacopy$REASON_FOR_FORCE)
summary(datacopy$REASON_FOR_FORCE)
## Active Aggression Aggressive Animal Arrest
## 346 4 1050
## Assault to Other Person Barricaded Person Crowd Disbursement
## 65 7 2
## Danger to self or others Detention/Frisk Other
## 347 206 148
## Property Destruction Weapon Display NA's
## 2 195 11
We begin by removing useless columns like LOCATION_STATE, LOCATION_CITY as both of those contain a redundant value throughout the dataset. We also remove STREET_DIRECTION from the dataset as it is not of big imporatance with respect to our objective. Using the reassignment we remove the rows which have null values for columns longitude and latitude in the dataset, this makes the plotting process a whole lot smoother. Then using the str() function we compare the original messy dataset with the new clean one, we just obtained after all the peprocessing.
datacopy <- select(datacopy, -c(LOCATION_STATE, LOCATION_CITY, STREET_DIRECTION))
datacopy <- rename(datacopy, LON = LOCATION_LONGITUDE,
LAT = LOCATION_LATITUDE)
datacopy <- datacopy[-c(4:5, 10:18, 20:35, 37:64),]
str(datacopy)
## 'data.frame': 2328 obs. of 44 variables:
## $ INCIDENT_DATE : Date, format: "0016-09-03" "0016-03-22" ...
## $ INCIDENT_TIME : POSIXlt, format: "2022-04-29 04:14:00" "2022-04-29 11:00:00" ...
## $ UOF_NUMBER : chr "37702" "33413" "34567" "36724" ...
## $ OFFICER_ID : num 10810 7706 11014 9855 9881 ...
## $ OFFICER_GENDER : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 1 2 2 2 2 ...
## $ OFFICER_RACE : Factor w/ 6 levels "American Ind",..: 3 6 3 6 6 3 4 3 4 6 ...
## $ OFFICER_HIRE_DATE : Date, format: "0014-05-07" "0099-01-08" ...
## $ OFFICER_YEARS_ON_FORCE : num 2 17 1 7 7 9 4 4 4 3 ...
## $ OFFICER_INJURY : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ OFFICER_INJURY_TYPE : chr "No injuries noted or visible" "Sprain/Strain" "No injuries noted or visible" "No injuries noted or visible" ...
## $ OFFICER_HOSPITALIZATION : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ SUBJECT_ID : num 46424 44324 45126 46549 47555 ...
## $ SUBJECT_RACE : Factor w/ 6 levels "American Ind",..: 3 4 4 6 3 6 3 6 6 3 ...
## $ SUBJECT_GENDER : Factor w/ 3 levels "Female","Male",..: 1 2 2 1 2 1 1 1 2 1 ...
## $ SUBJECT_INJURY : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 2 2 2 1 ...
## $ SUBJECT_INJURY_TYPE : chr "Non-Visible Injury/Pain" "No injuries noted or visible" "No injuries noted or visible" "No injuries noted or visible" ...
## $ SUBJECT_WAS_ARRESTED : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 1 2 ...
## $ SUBJECT_DESCRIPTION : Factor w/ 14 levels "Alchohol","Alchohol and unknown drugs",..: 11 11 13 13 12 11 11 11 14 13 ...
## $ SUBJECT_OFFENSE : chr "APOWW" "APOWW" "APOWW" "Assault/FV" ...
## $ REPORTING_AREA : num 2062 1197 4153 1134 2049 ...
## $ BEAT : num 134 237 432 235 132 515 133 354 622 424 ...
## $ SECTOR : num 130 230 430 230 130 510 130 350 620 420 ...
## $ DIVISION : Factor w/ 7 levels "CENTRAL","NORTH CENTRAL",..: 1 3 7 3 1 4 1 6 2 7 ...
## $ LOCATION_DISTRICT : Factor w/ 14 levels "D1","D10","D11",..: 6 14 11 14 6 11 6 13 3 11 ...
## $ STREET_NUMBER : num 211 7647 716 1234 511 ...
## $ STREET_NAME : chr "Ervay" "Ferguson" "bimebella dr" "Peavy" ...
## $ STREET_TYPE : Factor w/ 21 levels "Ave","Ave.","Blvd.",..: 19 16 13 16 19 13 19 7 20 19 ...
## $ LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: chr "211 N ERVAY ST" "7647 FERGUSON RD" "716 BIMEBELLA LN" "1234 PEAVY RD" ...
## $ LAT : num 32.8 32.8 32.7 32.8 32.8 ...
## $ LON : num -96.8 -96.7 -96.9 -96.7 -96.8 ...
## $ INCIDENT_REASON : Factor w/ 13 levels "Accidental Discharge",..: 2 2 2 2 2 2 2 10 10 2 ...
## $ REASON_FOR_FORCE : Factor w/ 11 levels "Active Aggression",..: 3 3 3 3 3 3 7 3 7 3 ...
## $ TYPE_OF_FORCE_USED1 : chr "Hand/Arm/Elbow Strike" "Joint Locks" "Take Down - Group" "Hand Controlled Escort" ...
## $ TYPE_OF_FORCE_USED2 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED3 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED4 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED5 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED6 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED7 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED8 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED9 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED10 : chr NA NA NA NA ...
## $ NUMBER_EC_CYCLES : chr NA NA NA NA ...
## $ FORCE_EFFECTIVE : chr " Yes" " Yes" " Yes" " Yes" ...
str(df)
## 'data.frame': 2384 obs. of 47 variables:
## $ INCIDENT_DATE : chr "OCCURRED_D" "9/3/16" "3/22/16" "5/22/16" ...
## $ INCIDENT_TIME : chr "OCCURRED_T" "4:14:00 AM" "11:00:00 PM" "1:29:00 PM" ...
## $ UOF_NUMBER : chr "UOFNum" "37702" "33413" "34567" ...
## $ OFFICER_ID : chr "CURRENT_BA" "10810" "7706" "11014" ...
## $ OFFICER_GENDER : chr "OffSex" "Male" "Male" "Male" ...
## $ OFFICER_RACE : chr "OffRace" "Black" "White" "Black" ...
## $ OFFICER_HIRE_DATE : chr "HIRE_DT" "5/7/14" "1/8/99" "5/20/15" ...
## $ OFFICER_YEARS_ON_FORCE : chr "INCIDENT_DATE_LESS_" "2" "17" "1" ...
## $ OFFICER_INJURY : chr "OFF_INJURE" "No" "Yes" "No" ...
## $ OFFICER_INJURY_TYPE : chr "OFF_INJURE_DESC" "No injuries noted or visible" "Sprain/Strain" "No injuries noted or visible" ...
## $ OFFICER_HOSPITALIZATION : chr "OFF_HOSPIT" "No" "Yes" "No" ...
## $ SUBJECT_ID : chr "CitNum" "46424" "44324" "45126" ...
## $ SUBJECT_RACE : chr "CitRace" "Black" "Hispanic" "Hispanic" ...
## $ SUBJECT_GENDER : chr "CitSex" "Female" "Male" "Male" ...
## $ SUBJECT_INJURY : chr "CIT_INJURE" "Yes" "No" "No" ...
## $ SUBJECT_INJURY_TYPE : chr "SUBJ_INJURE_DESC" "Non-Visible Injury/Pain" "No injuries noted or visible" "No injuries noted or visible" ...
## $ SUBJECT_WAS_ARRESTED : chr "CIT_ARREST" "Yes" "Yes" "Yes" ...
## $ SUBJECT_DESCRIPTION : chr "CIT_INFL_A" "Mentally unstable" "Mentally unstable" "Unknown" ...
## $ SUBJECT_OFFENSE : chr "CitChargeT" "APOWW" "APOWW" "APOWW" ...
## $ REPORTING_AREA : chr "RA" "2062" "1197" "4153" ...
## $ BEAT : chr "BEAT" "134" "237" "432" ...
## $ SECTOR : chr "SECTOR" "130" "230" "430" ...
## $ DIVISION : chr "DIVISION" "CENTRAL" "NORTHEAST" "SOUTHWEST" ...
## $ LOCATION_DISTRICT : chr "DIST_NAME" "D14" "D9" "D6" ...
## $ STREET_NUMBER : chr "STREET_N" "211" "7647" "716" ...
## $ STREET_NAME : chr "STREET" "Ervay" "Ferguson" "bimebella dr" ...
## $ STREET_DIRECTION : chr "street_g" "N" "NULL" "NULL" ...
## $ STREET_TYPE : chr "street_t" "St." "Rd." "Ln." ...
## $ LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: chr "Street Address" "211 N ERVAY ST" "7647 FERGUSON RD" "716 BIMEBELLA LN" ...
## $ LOCATION_CITY : chr "City" "Dallas" "Dallas" "Dallas" ...
## $ LOCATION_STATE : chr "State" "TX" "TX" "TX" ...
## $ LOCATION_LATITUDE : chr "Latitude" "32.782205" "32.798978" "32.73971" ...
## $ LOCATION_LONGITUDE : chr "Longitude" "-96.797461" "-96.717493" "-96.92519" ...
## $ INCIDENT_REASON : chr "SERVICE_TY" "Arrest" "Arrest" "Arrest" ...
## $ REASON_FOR_FORCE : chr "UOF_REASON" "Arrest" "Arrest" "Arrest" ...
## $ TYPE_OF_FORCE_USED1 : chr "ForceType1" "Hand/Arm/Elbow Strike" "Joint Locks" "Take Down - Group" ...
## $ TYPE_OF_FORCE_USED2 : chr "ForceType2" "" "" "" ...
## $ TYPE_OF_FORCE_USED3 : chr "ForceType3" "" "" "" ...
## $ TYPE_OF_FORCE_USED4 : chr "ForceType4" "" "" "" ...
## $ TYPE_OF_FORCE_USED5 : chr "ForceType5" "" "" "" ...
## $ TYPE_OF_FORCE_USED6 : chr "ForceType6" "" "" "" ...
## $ TYPE_OF_FORCE_USED7 : chr "ForceType7" "" "" "" ...
## $ TYPE_OF_FORCE_USED8 : chr "ForceType8" "" "" "" ...
## $ TYPE_OF_FORCE_USED9 : chr "ForceType9" "" "" "" ...
## $ TYPE_OF_FORCE_USED10 : chr "ForceType10" "" "" "" ...
## $ NUMBER_EC_CYCLES : chr "Cycles_Num" "NULL" "NULL" "NULL" ...
## $ FORCE_EFFECTIVE : chr "ForceEffec" " Yes" " Yes" " Yes" ...
Splitting the dataset into numerical and categorical variables for efficient plotting
numericVars <- which(sapply(datacopy, is.numeric))
numericVarNames <- names(numericVars)
cat('There are', length(numericVars), 'numeric variables')
## There are 9 numeric variables
catVars <- which(sapply(datacopy, is.factor))
catVarNames <- names(catVars)
cat('The are', length(catVars), 'categorical variables')
## The are 14 categorical variables
catVars
## OFFICER_GENDER OFFICER_RACE OFFICER_INJURY
## 5 6 9
## OFFICER_HOSPITALIZATION SUBJECT_RACE SUBJECT_GENDER
## 11 13 14
## SUBJECT_INJURY SUBJECT_WAS_ARRESTED SUBJECT_DESCRIPTION
## 15 17 18
## DIVISION LOCATION_DISTRICT STREET_TYPE
## 23 24 27
## INCIDENT_REASON REASON_FOR_FORCE
## 31 32
numericVars
## OFFICER_ID OFFICER_YEARS_ON_FORCE SUBJECT_ID
## 4 8 12
## REPORTING_AREA BEAT SECTOR
## 20 21 22
## STREET_NUMBER LAT LON
## 25 29 30
cat_df <- subset(datacopy, select = c(5, 6, 9, 11, 13, 14, 15, 17, 18, 23, 24, 27, 31, 32))
num_df <- subset(datacopy, select = c(4, 8, 12, 20, 21, 22, 25, 29, 30))
plot_num(num_df)
freq(cat_df)
## OFFICER_GENDER frequency percentage cumulative_perc
## 1 Male 2089 89.73 89.73
## 2 Female 239 10.27 100.00
## OFFICER_RACE frequency percentage cumulative_perc
## 1 White 1435 61.64 61.64
## 2 Hispanic 475 20.40 82.04
## 3 Black 331 14.22 96.26
## 4 Asian 54 2.32 98.58
## 5 Other 26 1.12 99.70
## 6 American Ind 7 0.30 100.00
## OFFICER_INJURY frequency percentage cumulative_perc
## 1 No 2097 90.08 90.08
## 2 Yes 231 9.92 100.00
## OFFICER_HOSPITALIZATION frequency percentage cumulative_perc
## 1 No 2280 97.94 97.94
## 2 Yes 48 2.06 100.00
## SUBJECT_RACE frequency percentage cumulative_perc
## 1 Black 1301 55.88 55.88
## 2 Hispanic 516 22.16 78.04
## 3 White 460 19.76 97.80
## 4 <NA> 35 1.50 99.30
## 5 Other 11 0.47 99.77
## 6 Asian 4 0.17 99.94
## 7 American Ind 1 0.04 100.00
## SUBJECT_GENDER frequency percentage cumulative_perc
## 1 Male 1893 81.31 81.31
## 2 Female 424 18.21 99.52
## 3 <NA> 10 0.43 99.95
## 4 Unknown 1 0.04 100.00
## SUBJECT_INJURY frequency percentage cumulative_perc
## 1 No 1707 73.32 73.32
## 2 Yes 621 26.68 100.00
## SUBJECT_WAS_ARRESTED frequency percentage cumulative_perc
## 1 Yes 1999 85.87 85.87
## 2 No 329 14.13 100.00
## SUBJECT_DESCRIPTION frequency percentage cumulative_perc
## 1 Mentally unstable 401 17.23 17.23
## 2 Alchohol 378 16.24 33.47
## 3 Unknown 358 15.38 48.85
## 4 Unknown Drugs 312 13.40 62.25
## 5 None detected 291 12.50 74.75
## 6 Alchohol and unknown drugs 271 11.64 86.39
## 7 FD-Unknown if Armed 106 4.55 90.94
## 8 <NA> 75 3.22 94.16
## 9 Marijuana 47 2.02 96.18
## 10 FD-Suspect w/ Gun 33 1.42 97.60
## 11 FD-Suspect Unarmed 28 1.20 98.80
## 12 FD-Suspect w/ Other Weapon 24 1.03 99.83
## 13 FD-Motor Vehicle 2 0.09 99.92
## 14 Animal 1 0.04 99.96
## 15 FD-Animal 1 0.04 100.00
## DIVISION frequency percentage cumulative_perc
## 1 CENTRAL 556 23.88 23.88
## 2 SOUTHEAST 344 14.78 38.66
## 3 NORTHEAST 332 14.26 52.92
## 4 NORTH CENTRAL 309 13.27 66.19
## 5 SOUTH CENTRAL 303 13.02 79.21
## 6 SOUTHWEST 295 12.67 91.88
## 7 NORTHWEST 189 8.12 100.00
## LOCATION_DISTRICT frequency percentage cumulative_perc
## 1 D14 311 13.36 13.36
## 2 D2 305 13.10 26.46
## 3 D4 221 9.49 35.95
## 4 D7 214 9.19 45.14
## 5 D6 212 9.11 54.25
## 6 D8 168 7.22 61.47
## 7 D11 151 6.49 67.96
## 8 D10 135 5.80 73.76
## 9 D12 134 5.76 79.52
## 10 D5 114 4.90 84.42
## 11 D9 94 4.04 88.46
## 12 D3 93 3.99 92.45
## 13 D13 92 3.95 96.40
## 14 D1 84 3.61 100.00
## STREET_TYPE frequency percentage cumulative_perc
## 1 St. 549 23.58 23.58
## 2 Rd. 395 16.97 40.55
## 3 Ave. 351 15.08 55.63
## 4 Dr. 317 13.62 69.25
## 5 Ln. 219 9.41 78.66
## 6 Blvd. 151 6.49 85.15
## 7 <NA> 135 5.80 90.95
## 8 Pkwy. 44 1.89 92.84
## 9 Expwy. 36 1.55 94.39
## 10 Frwy. 35 1.50 95.89
## 11 Hwy. 35 1.50 97.39
## 12 Way 20 0.86 98.25
## 13 Trl. 10 0.43 98.68
## 14 Pl. 9 0.39 99.07
## 15 Cir. 8 0.34 99.41
## 16 Ct. 5 0.21 99.62
## 17 Ave 3 0.13 99.75
## 18 St 2 0.09 99.84
## 19 Dr 1 0.04 99.88
## 20 Frwy 1 0.04 99.92
## 21 Intersec 1 0.04 99.96
## 22 Row 1 0.04 100.00
## INCIDENT_REASON frequency percentage cumulative_perc
## 1 Arrest 1122 48.20 48.20
## 2 Service Call 666 28.61 76.81
## 3 Call for Cover 131 5.63 82.44
## 4 Traffic Stop 90 3.87 86.31
## 5 Crime in Progress 81 3.48 89.79
## 6 Other ( In Narrative) 68 2.92 92.71
## 7 Off-Duty Employment 51 2.19 94.90
## 8 Suspicious Activity 43 1.85 96.75
## 9 Pedestrian Stop 36 1.55 98.30
## 10 Off-Duty Incident 12 0.52 98.82
## 11 Warrant Execution 11 0.47 99.29
## 12 <NA> 11 0.47 99.76
## 13 Crowd Control 5 0.21 99.97
## 14 Accidental Discharge 1 0.04 100.00
## REASON_FOR_FORCE frequency percentage cumulative_perc
## 1 Arrest 1029 44.20 44.20
## 2 Danger to self or others 341 14.65 58.85
## 3 Active Aggression 333 14.30 73.15
## 4 Detention/Frisk 202 8.68 81.83
## 5 Weapon Display 189 8.12 89.95
## 6 Other 143 6.14 96.09
## 7 Assault to Other Person 65 2.79 98.88
## 8 <NA> 11 0.47 99.35
## 9 Barricaded Person 7 0.30 99.65
## 10 Aggressive Animal 4 0.17 99.82
## 11 Crowd Disbursement 2 0.09 99.91
## 12 Property Destruction 2 0.09 100.00
## [1] "Variables processed: OFFICER_GENDER, OFFICER_RACE, OFFICER_INJURY, OFFICER_HOSPITALIZATION, SUBJECT_RACE, SUBJECT_GENDER, SUBJECT_INJURY, SUBJECT_WAS_ARRESTED, SUBJECT_DESCRIPTION, DIVISION, LOCATION_DISTRICT, STREET_TYPE, INCIDENT_REASON, REASON_FOR_FORCE"
Plotting the variables as pie charts to visualize the assorted variables
ggplot(datacopy, aes(x = factor(1), fill = OFFICER_RACE)) +
geom_bar(width = 1) +
coord_polar("y")
ggplot(datacopy, aes(x = factor(1), fill = OFFICER_GENDER)) +
geom_bar(width = 1) +
coord_polar("y")
ggplot(datacopy, aes(x = factor(1), fill = SUBJECT_RACE)) +
geom_bar(width = 1) +
coord_polar("y")
ggplot(datacopy, aes(x = factor(1), fill = SUBJECT_GENDER)) +
geom_bar(width = 1) +
coord_polar("y")
Plotting the variables as hue filled bar plots
ggplot(datacopy) +
geom_bar(aes(x = OFFICER_YEARS_ON_FORCE)) +
scale_fill_viridis_d()
ggplot(datacopy) +
geom_bar(aes(x = SUBJECT_WAS_ARRESTED, fill = SUBJECT_RACE)) +
scale_fill_viridis_d()
ggplot(datacopy) +
geom_bar(aes(x = SUBJECT_WAS_ARRESTED, fill = OFFICER_RACE)) +
scale_fill_viridis_d()
ggplot(datacopy) +
geom_bar(aes(x = SUBJECT_RACE, fill = SUBJECT_WAS_ARRESTED)) +
scale_fill_viridis_d()
ggplot(datacopy) +
geom_bar(aes(x = SUBJECT_WAS_ARRESTED, fill = OFFICER_INJURY)) +
scale_fill_viridis_d()
Visualising the data based on the longitudinal and latitudinal coordinates and other columns acting as hue (z-axis)
str(datacopy)
## 'data.frame': 2328 obs. of 44 variables:
## $ INCIDENT_DATE : Date, format: "0016-09-03" "0016-03-22" ...
## $ INCIDENT_TIME : POSIXlt, format: "2022-04-29 04:14:00" "2022-04-29 11:00:00" ...
## $ UOF_NUMBER : chr "37702" "33413" "34567" "36724" ...
## $ OFFICER_ID : num 10810 7706 11014 9855 9881 ...
## $ OFFICER_GENDER : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 1 2 2 2 2 ...
## $ OFFICER_RACE : Factor w/ 6 levels "American Ind",..: 3 6 3 6 6 3 4 3 4 6 ...
## $ OFFICER_HIRE_DATE : Date, format: "0014-05-07" "0099-01-08" ...
## $ OFFICER_YEARS_ON_FORCE : num 2 17 1 7 7 9 4 4 4 3 ...
## $ OFFICER_INJURY : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ OFFICER_INJURY_TYPE : chr "No injuries noted or visible" "Sprain/Strain" "No injuries noted or visible" "No injuries noted or visible" ...
## $ OFFICER_HOSPITALIZATION : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ SUBJECT_ID : num 46424 44324 45126 46549 47555 ...
## $ SUBJECT_RACE : Factor w/ 6 levels "American Ind",..: 3 4 4 6 3 6 3 6 6 3 ...
## $ SUBJECT_GENDER : Factor w/ 3 levels "Female","Male",..: 1 2 2 1 2 1 1 1 2 1 ...
## $ SUBJECT_INJURY : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 2 2 2 1 ...
## $ SUBJECT_INJURY_TYPE : chr "Non-Visible Injury/Pain" "No injuries noted or visible" "No injuries noted or visible" "No injuries noted or visible" ...
## $ SUBJECT_WAS_ARRESTED : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 1 2 ...
## $ SUBJECT_DESCRIPTION : Factor w/ 14 levels "Alchohol","Alchohol and unknown drugs",..: 11 11 13 13 12 11 11 11 14 13 ...
## $ SUBJECT_OFFENSE : chr "APOWW" "APOWW" "APOWW" "Assault/FV" ...
## $ REPORTING_AREA : num 2062 1197 4153 1134 2049 ...
## $ BEAT : num 134 237 432 235 132 515 133 354 622 424 ...
## $ SECTOR : num 130 230 430 230 130 510 130 350 620 420 ...
## $ DIVISION : Factor w/ 7 levels "CENTRAL","NORTH CENTRAL",..: 1 3 7 3 1 4 1 6 2 7 ...
## $ LOCATION_DISTRICT : Factor w/ 14 levels "D1","D10","D11",..: 6 14 11 14 6 11 6 13 3 11 ...
## $ STREET_NUMBER : num 211 7647 716 1234 511 ...
## $ STREET_NAME : chr "Ervay" "Ferguson" "bimebella dr" "Peavy" ...
## $ STREET_TYPE : Factor w/ 21 levels "Ave","Ave.","Blvd.",..: 19 16 13 16 19 13 19 7 20 19 ...
## $ LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: chr "211 N ERVAY ST" "7647 FERGUSON RD" "716 BIMEBELLA LN" "1234 PEAVY RD" ...
## $ LAT : num 32.8 32.8 32.7 32.8 32.8 ...
## $ LON : num -96.8 -96.7 -96.9 -96.7 -96.8 ...
## $ INCIDENT_REASON : Factor w/ 13 levels "Accidental Discharge",..: 2 2 2 2 2 2 2 10 10 2 ...
## $ REASON_FOR_FORCE : Factor w/ 11 levels "Active Aggression",..: 3 3 3 3 3 3 7 3 7 3 ...
## $ TYPE_OF_FORCE_USED1 : chr "Hand/Arm/Elbow Strike" "Joint Locks" "Take Down - Group" "Hand Controlled Escort" ...
## $ TYPE_OF_FORCE_USED2 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED3 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED4 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED5 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED6 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED7 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED8 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED9 : chr NA NA NA NA ...
## $ TYPE_OF_FORCE_USED10 : chr NA NA NA NA ...
## $ NUMBER_EC_CYCLES : chr NA NA NA NA ...
## $ FORCE_EFFECTIVE : chr " Yes" " Yes" " Yes" " Yes" ...
mapview(datacopy,
xcol = "LON",
ycol = "LAT",
zcol = "DIVISION",
crs = 4326,
grid = FALSE,
map.types = "Stamen.Toner")
mapview(datacopy,
xcol = "LON",
ycol = "LAT",
zcol = "LOCATION_DISTRICT",
crs = 4326,
grid = FALSE,
map.types = "Stamen.Toner")
mapview(datacopy,
xcol = "LON",
ycol = "LAT",
zcol = "OFFICER_RACE",
crs = 4326,
grid = FALSE,
map.types = "Stamen.Toner")